library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom 1.0.1 ✔ rsample 1.1.0
## ✔ dials 1.0.0 ✔ tune 1.0.0
## ✔ infer 1.0.3 ✔ workflows 1.1.0
## ✔ modeldata 1.0.1 ✔ workflowsets 1.0.0
## ✔ parsnip 1.0.2 ✔ yardstick 1.1.0
## ✔ recipes 1.0.1
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## • Dig deeper into tidy modeling with R at https://www.tmwr.org
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggcorrplot)
library(Factoshiny)
## Loading required package: FactoMineR
## Loading required package: shiny
##
## Attaching package: 'shiny'
##
## The following object is masked from 'package:infer':
##
## observe
##
## Loading required package: FactoInvestigate
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Attaching package: 'arules'
##
## The following object is masked from 'package:recipes':
##
## discretize
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(RColorBrewer)
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
##
## The following object is masked from 'package:scales':
##
## viridis_pal
library(ggrepel)
options(ggrepel.max.overlaps = Inf)
Read data
df <- read_csv("~/Documentos/GitHub/MVA-Project/df_preprocessed.csv")
## Rows: 367645 Columns: 42
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Name, Sex, Event, Equipment, Place, Tested, Country, Federation, ...
## dbl (31): Age, BodyweightKg, Squat1Kg, Squat2Kg, Squat3Kg, Bench1Kg, Bench2...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
numeric_cols <- df %>%
select_if(is.numeric)
cols_zero_var <- df %>%
select(colnames(numeric_cols)[1:13])
round(cor(cols_zero_var, use = "complete.obs"), 1) %>%
ggcorrplot()
Remember that these correlations are using the negative values of the
lift. Let’s change that
round(cor(cols_zero_var %>% mutate_if(is.numeric, abs), use = "complete.obs"), 1) %>%
ggcorrplot(method = "circle", type = "lower")
Makes sense
pca_rec <- recipe(~., data = cols_zero_var %>% mutate_if(is.numeric, abs)) %>%
step_normalize(all_predictors()) %>%
step_pca(all_predictors())
pca_prep <- prep(pca_rec)
tidied_pca <- tidy(pca_prep, 2)
tidied_pca %>%
filter(component %in% paste0("PC", 1:9)) %>%
mutate(component = fct_inorder(component)) %>%
ggplot(aes(value, terms, fill = terms)) +
geom_col(show.legend = FALSE) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL)
summary(pca_prep$steps[[2]]$res)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.221 1.03064 0.87961 0.59273 0.46153 0.43331 0.12534
## Proportion of Variance 0.798 0.08171 0.05952 0.02702 0.01639 0.01444 0.00121
## Cumulative Proportion 0.798 0.87971 0.93922 0.96625 0.98263 0.99708 0.99829
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 0.07941 0.07397 0.06822 0.04907 0.04226 0.04081
## Proportion of Variance 0.00049 0.00042 0.00036 0.00019 0.00014 0.00013
## Cumulative Proportion 0.99877 0.99919 0.99955 0.99973 0.99987 1.00000
juice(pca_prep) %>%
ggplot(aes(PC1, PC2)) +
geom_point(alpha = 0.7, size = 2) +
labs(color = NULL)
pca_cumm <- as_data_frame(summary(pca_prep$steps[[2]]$res)$importance)[3, ] %>%
gather()
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
pca_cumm %>%
mutate(key = fct_inorder(key)) %>%
filter(key %in% paste0("PC", 1:5)) %>%
ggplot(aes(x = key, y = value)) + geom_col(fill = "#91E5F8")
With factoextra
res.pca <- PCA(cols_zero_var %>% mutate_if(is.numeric, abs), graph = FALSE)
eig <- get_eig(res.pca)
eig
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 10.373974176 79.79980135 79.79980
## Dim.2 1.062223239 8.17094799 87.97075
## Dim.3 0.773708414 5.95160318 93.92235
## Dim.4 0.351324444 2.70249572 96.62485
## Dim.5 0.213010307 1.63854082 98.26339
## Dim.6 0.187759475 1.44430365 99.70769
## Dim.7 0.015709441 0.12084185 99.82853
## Dim.8 0.006306282 0.04850986 99.87704
## Dim.9 0.005471419 0.04208784 99.91913
## Dim.10 0.004654173 0.03580133 99.95493
## Dim.11 0.002407607 0.01852006 99.97345
## Dim.12 0.001785825 0.01373712 99.98719
## Dim.13 0.001665200 0.01280923 100.00000
fviz_screeplot(res.pca, addlabels = TRUE, ylim = c(0, 50))
var <- get_pca_var(res.pca)
var
## Principal Component Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the variables"
## 2 "$cor" "Correlations between variables and dimensions"
## 3 "$cos2" "Cos2 for the variables"
## 4 "$contrib" "contributions of the variables"
fviz_pca_var(res.pca, col.var="contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
# Contributions of variables to PC1
fviz_contrib(res.pca, choice = "var", axes = 1, top = 10)
# Contributions of variables to PC2
fviz_contrib(res.pca, choice = "var", axes = 2, top = 10)
fviz_pca_ind(res.pca,
label = "none", # hide individual labels
habillage = as_factor(df$Sex), # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, # Concentration ellipses
alpha.ind = 0.3
)
df_mca <- df %>%
mutate(across(ends_with("?"), ~factor(.x, labels = c("Not lifted", "Lifted")))) %>%
select(Sex, Equipment, Tested, Federation, ends_with("?")) %>%
mutate(across(1:5, ~factor(.x)))
#%>% sample_n(1000)
#result <- Factoshiny(df_mca)
res.MCA <- MCA(df_mca,graph=FALSE)
plot.MCA(res.MCA, choix='var')
plot.MCA(res.MCA,invisible= 'ind',selectMod= 'cos2 0.2',label =c('var'), graph.type="ggplot") +
geom_text_repel() + ggtitle("Mca factor map with cos2 >= 0.2")
#Values
summary(res.MCA)
##
## Call:
## MCA(X = df_mca, graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 0.165 0.133 0.099 0.087 0.083 0.081 0.081
## % of var. 4.049 3.259 2.433 2.139 2.046 1.998 1.979
## Cumulative % of var. 4.049 7.309 9.742 11.881 13.928 15.925 17.904
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 0.079 0.077 0.077 0.077 0.077 0.077 0.077
## % of var. 1.931 1.897 1.894 1.890 1.889 1.888 1.887
## Cumulative % of var. 19.835 21.733 23.626 25.516 27.405 29.293 31.180
## Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21
## Variance 0.077 0.077 0.077 0.077 0.077 0.077 0.077
## % of var. 1.887 1.887 1.887 1.887 1.887 1.887 1.887
## Cumulative % of var. 33.066 34.953 36.840 38.727 40.614 42.500 44.387
## Dim.22 Dim.23 Dim.24 Dim.25 Dim.26 Dim.27 Dim.28
## Variance 0.077 0.077 0.077 0.077 0.077 0.077 0.077
## % of var. 1.887 1.887 1.887 1.887 1.887 1.887 1.887
## Cumulative % of var. 46.274 48.161 50.048 51.934 53.821 55.708 57.595
## Dim.29 Dim.30 Dim.31 Dim.32 Dim.33 Dim.34 Dim.35
## Variance 0.077 0.077 0.077 0.077 0.077 0.077 0.077
## % of var. 1.887 1.887 1.887 1.887 1.887 1.887 1.887
## Cumulative % of var. 59.482 61.368 63.255 65.142 67.029 68.915 70.802
## Dim.36 Dim.37 Dim.38 Dim.39 Dim.40 Dim.41 Dim.42
## Variance 0.077 0.077 0.077 0.077 0.077 0.076 0.074
## % of var. 1.887 1.887 1.887 1.887 1.887 1.862 1.806
## Cumulative % of var. 72.689 74.576 76.463 78.349 80.236 82.098 83.904
## Dim.43 Dim.44 Dim.45 Dim.46 Dim.47 Dim.48 Dim.49
## Variance 0.073 0.073 0.072 0.072 0.070 0.068 0.066
## % of var. 1.790 1.786 1.775 1.767 1.711 1.665 1.621
## Cumulative % of var. 85.694 87.480 89.255 91.021 92.732 94.397 96.018
## Dim.50 Dim.51 Dim.52 Dim.53
## Variance 0.066 0.060 0.032 0.005
## % of var. 1.609 1.472 0.790 0.111
## Cumulative % of var. 97.627 99.098 99.889 100.000
##
## Individuals (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr cos2
## 1 | 0.102 0.000 0.005 | 0.067 0.000 0.002 | 0.105 0.000 0.005
## 2 | 0.135 0.000 0.007 | 0.107 0.000 0.005 | 0.143 0.000 0.008
## 3 | -0.129 0.000 0.008 | -0.163 0.000 0.013 | 0.401 0.000 0.075
## 4 | 0.098 0.000 0.004 | 0.090 0.000 0.003 | -0.188 0.000 0.014
## 5 | -0.129 0.000 0.008 | -0.163 0.000 0.013 | 0.401 0.000 0.075
## 6 | -0.129 0.000 0.008 | -0.163 0.000 0.013 | 0.401 0.000 0.075
## 7 | 0.164 0.000 0.012 | 0.156 0.000 0.011 | -0.076 0.000 0.003
## 8 | 0.267 0.000 0.030 | 0.254 0.000 0.027 | -0.245 0.000 0.025
## 9 | 0.078 0.000 0.003 | 0.038 0.000 0.001 | 0.103 0.000 0.005
## 10 | 0.013 0.000 0.000 | -0.032 0.000 0.000 | 0.302 0.000 0.043
##
## 1 |
## 2 |
## 3 |
## 4 |
## 5 |
## 6 |
## 7 |
## 8 |
## 9 |
## 10 |
##
## Categories (the 10 first)
## Dim.1 ctr cos2 v.test Dim.2 ctr cos2
## F | -0.140 0.301 0.010 -59.557 | -0.105 0.209 0.005
## M | 0.069 0.148 0.010 59.557 | 0.052 0.103 0.005
## Raw | -0.408 5.188 0.336 -351.208 | -0.304 3.563 0.185
## Single-ply | 0.819 10.356 0.332 349.537 | 0.617 7.298 0.189
## Wraps | 2.194 0.146 0.003 33.999 | -2.381 0.214 0.004
## No | 6.728 26.829 0.583 463.040 | -5.449 21.861 0.383
## Yes | -0.087 0.346 0.583 -463.040 | 0.070 0.282 0.383
## AEP | -0.407 0.066 0.001 -22.861 | -0.294 0.042 0.001
## APU | -0.674 0.216 0.005 -41.500 | -0.723 0.309 0.005
## AsianPF | 0.567 0.113 0.002 30.037 | 0.887 0.346 0.006
## v.test Dim.3 ctr cos2 v.test
## F -44.521 | -0.192 0.940 0.018 -81.558 |
## M 44.521 | 0.094 0.463 0.018 81.558 |
## Raw -261.122 | -0.326 5.498 0.214 -280.270 |
## Single-ply 263.255 | 0.657 11.071 0.213 280.152 |
## Wraps -36.905 | 0.305 0.005 0.000 4.732 |
## No -375.012 | -0.042 0.002 0.000 -2.921 |
## Yes 375.012 | 0.001 0.000 0.000 2.921 |
## AEP -16.498 | -1.109 0.812 0.011 -62.319 |
## APU -44.512 | -0.024 0.000 0.000 -1.465 |
## AsianPF 47.034 | 1.794 1.893 0.025 95.104 |
##
## Categorical variables (eta2)
## Dim.1 Dim.2 Dim.3
## Sex | 0.010 0.005 0.018 |
## Equipment | 0.337 0.191 0.214 |
## Tested | 0.583 0.383 0.000 |
## Federation | 0.759 0.667 0.400 |
## LiftedSquat1Kg? | 0.029 0.040 0.015 |
## LiftedSquat2Kg? | 0.081 0.068 0.045 |
## LiftedSquat3Kg? | 0.068 0.051 0.113 |
## LiftedBench1Kg? | 0.019 0.053 0.000 |
## LiftedBench2Kg? | 0.036 0.053 0.092 |
## LiftedBench3Kg? | 0.010 0.008 0.243 |
#Description of the axis
#eigenvalues > 1/n_categories
#measures the degree of association between variable categories and a particular axis
fviz_mca_var(res.MCA, alpha.var = "cos2", select.var = list(cos2 = 0.15),
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE, # Avoid text overlapping
ggtheme = theme_minimal())
#visualize the inertia kept by dimensions
fviz_screeplot(res.MCA, addlabels = TRUE, ylim = c(0, 45))
#quality of representation
fviz_cos2(res.MCA, choice = "var", axes = 1:2)
df_mfa <- df %>%
mutate(across(ends_with("?"), ~factor(.x, labels = c("Not lifted", "Lifted")))) %>%
select(Sex, Equipment, Tested, Federation, ends_with("?"), where(is.numeric)) %>%
mutate(across(1:5, ~factor(.x))) %>%
mutate_if(is.numeric, abs)
#We have to decide groups
# Lifter associated health and condition
# Weight tried
# Quality of the lifts
df_mfa <- df_mfa %>%
select(Age, BodyweightKg,
contains("WeightTried"),
ends_with("?"))
res.mfa <- MFA(df_mfa,
group = c(2, 9, 9),
type = c("s", "s", "n"),
name.group = c("Lifter condition", "Weight tried", "Quality of the lift"),
graph = FALSE)
head(get_eigenvalue(res.mfa))
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 1.5975177 18.317722 18.31772
## Dim.2 1.0184891 11.678368 29.99609
## Dim.3 0.9031305 10.355624 40.35171
## Dim.4 0.6876887 7.885290 48.23700
## Dim.5 0.6424357 7.366403 55.60341
## Dim.6 0.6316142 7.242319 62.84573
fviz_screeplot(res.mfa)
group_mfa <- get_mfa_var(res.mfa, "group")
fviz_mfa_var(res.mfa, "group")
fviz_contrib(res.mfa, "group", axes = 1)
fviz_contrib(res.mfa, "group", axes = 2)
get_mfa_var(res.mfa, "quanti.var")
## Multiple Factor Analysis results for quantitative variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates"
## 2 "$cos2" "Cos2, quality of representation"
## 3 "$contrib" "Contributions"
fviz_mfa_var(res.mfa, "quanti.var", palette = "jco",
col.var.sup = "violet", repel = TRUE,
geom = c("point", "text"), legend = "bottom")
fviz_contrib(res.mfa, choice = "quanti.var", axes = 1, top = 20,
palette = "jco")
fviz_contrib(res.mfa, choice = "quanti.var", axes = 2, top = 20,
palette = "jco")
fviz_mfa_var(res.mfa, "quanti.var", col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
col.var.sup = "violet", repel = TRUE,
geom = c("point", "text"))
fviz_mfa_var(res.mfa, col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
col.var.sup = "violet", repel = TRUE, geom = c("point", "text"))
fviz_cos2(res.mfa, choice = "quanti.var", axes = 1)
#The graph of partial axes shows the relationship between the principal axes
#of the MFA and the ones obtained from analyzing each group using either a PCA
#(for groups of continuous variables) or a MCA (for qualitative variables).
fviz_mfa_axes(res.mfa, geom = c("point", "text"), graph.type="ggplot", repel = T)
######Association Rules######
itemFrequencyPlot(transactions(df_mca), topN = 20,
col = brewer.pal(8, 'BuPu'),
main = 'Relative Item Frequency Plot',
type = "relative",
ylab = "Item Frequency (Relative)")
#https://www.datarlabs.com/post/market-basket-optimisation-using-association-rule-mining
df_eclat <- transactions(df_mca)
rules_eclat = eclat(data = df_eclat,
parameter = list(support = 0.6, minlen = 2))
## Eclat
##
## parameter specification:
## tidLists support minlen maxlen target ext
## FALSE 0.6 2 10 frequent itemsets TRUE
##
## algorithmic control:
## sparse sort verbose
## 7 -2 TRUE
##
## Absolute minimum support count: 220587
##
## create itemset ...
## set transactions ...[66 item(s), 367645 transaction(s)] done [0.24s].
## sorting and recoding items ... [10 item(s)] done [0.02s].
## creating bit matrix ... [10 row(s), 367645 column(s)] done [0.02s].
## writing ... [88 set(s)] done [0.00s].
## Creating S4 object ... done [0.00s].
inspect(sort(rules_eclat, by = 'support')[1:10])
## items support count
## [1] {Tested=Yes,
## LiftedDeadlift1Kg?=Lifted} 0.9401814 345653
## [2] {Tested=Yes,
## LiftedBench1Kg?=Lifted} 0.8896653 327081
## [3] {LiftedBench1Kg?=Lifted,
## LiftedDeadlift1Kg?=Lifted} 0.8604551 316342
## [4] {Tested=Yes,
## LiftedSquat1Kg?=Lifted} 0.8566226 314933
## [5] {Tested=Yes,
## LiftedDeadlift2Kg?=Lifted} 0.8558637 314654
## [6] {Tested=Yes,
## LiftedBench1Kg?=Lifted,
## LiftedDeadlift1Kg?=Lifted} 0.8495097 312318
## [7] {LiftedDeadlift1Kg?=Lifted,
## LiftedDeadlift2Kg?=Lifted} 0.8291803 304844
## [8] {LiftedSquat1Kg?=Lifted,
## LiftedDeadlift1Kg?=Lifted} 0.8288430 304720
## [9] {Tested=Yes,
## LiftedDeadlift1Kg?=Lifted,
## LiftedDeadlift2Kg?=Lifted} 0.8203539 301599
## [10] {Tested=Yes,
## LiftedSquat1Kg?=Lifted,
## LiftedDeadlift1Kg?=Lifted} 0.8190619 301124
#Tested -> may imply better tournament so better lifters
df_apriori <- transactions(df_mca)
#colors just because I can
rules_apriori <- apriori(df_apriori, parameter = list(supp = 0.1, conf = 0.8))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.1 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 36764
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[66 item(s), 367645 transaction(s)] done [0.21s].
## sorting and recoding items ... [22 item(s)] done [0.03s].
## creating transaction tree ... done [0.34s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10
## Warning in apriori(df_apriori, parameter = list(supp = 0.1, conf = 0.8)): Mining
## stopped (maxlen reached). Only patterns up to a length of 10 returned!
## done [0.04s].
## writing ... [33968 rule(s)] done [0.59s].
## creating S4 object ... done [0.07s].
inspect(rules_apriori[1:20])
## lhs rhs support confidence coverage lift count
## [1] {} => {LiftedSquat2Kg?=Lifted} 0.8027771 0.8027771 1.0000000 1.0000000 295137
## [2] {} => {LiftedDeadlift2Kg?=Lifted} 0.8652341 0.8652341 1.0000000 1.0000000 318099
## [3] {} => {LiftedSquat1Kg?=Lifted} 0.8670484 0.8670484 1.0000000 1.0000000 318766
## [4] {} => {LiftedBench1Kg?=Lifted} 0.9013641 0.9013641 1.0000000 1.0000000 331382
## [5] {} => {LiftedDeadlift1Kg?=Lifted} 0.9520761 0.9520761 1.0000000 1.0000000 350026
## [6] {} => {Tested=Yes} 0.9872785 0.9872785 1.0000000 1.0000000 362968
## [7] {LiftedSquat1Kg?=Not lifted} => {LiftedDeadlift2Kg?=Lifted} 0.1093773 0.8226846 0.1329516 0.9508231 40212
## [8] {LiftedSquat1Kg?=Not lifted} => {LiftedBench1Kg?=Lifted} 0.1130438 0.8502629 0.1329516 0.9433068 41560
## [9] {LiftedSquat1Kg?=Not lifted} => {LiftedDeadlift1Kg?=Lifted} 0.1232330 0.9269011 0.1329516 0.9735579 45306
## [10] {LiftedSquat1Kg?=Not lifted} => {Tested=Yes} 0.1306559 0.9827329 0.1329516 0.9953958 48035
## [11] {LiftedDeadlift2Kg?=Not lifted} => {LiftedSquat1Kg?=Lifted} 0.1111915 0.8250717 0.1347659 0.9515866 40879
## [12] {LiftedDeadlift2Kg?=Not lifted} => {LiftedBench1Kg?=Lifted} 0.1165146 0.8645703 0.1347659 0.9591799 42836
## [13] {LiftedDeadlift2Kg?=Not lifted} => {LiftedDeadlift1Kg?=Lifted} 0.1228957 0.9119202 0.1347659 0.9578229 45182
## [14] {LiftedDeadlift2Kg?=Not lifted} => {Tested=Yes} 0.1314148 0.9751342 0.1347659 0.9876992 48314
## [15] {LiftedSquat2Kg?=Not lifted} => {LiftedDeadlift2Kg?=Lifted} 0.1589033 0.8057042 0.1972229 0.9311979 58420
## [16] {LiftedSquat2Kg?=Not lifted} => {LiftedSquat1Kg?=Lifted} 0.1599641 0.8110829 0.1972229 0.9354529 58810
## [17] {LiftedSquat2Kg?=Not lifted} => {LiftedBench1Kg?=Lifted} 0.1701070 0.8625117 0.1972229 0.9568960 62539
## [18] {LiftedSquat2Kg?=Not lifted} => {LiftedDeadlift1Kg?=Lifted} 0.1837261 0.9315662 0.1972229 0.9784577 67546
## [19] {LiftedSquat2Kg?=Not lifted} => {Tested=Yes} 0.1926206 0.9766646 0.1972229 0.9892494 70816
## [20] {LiftedBench2Kg?=Not lifted} => {LiftedDeadlift2Kg?=Lifted} 0.1789797 0.8200523 0.2182540 0.9477809 65801
#Quick check
df_mca %>% select(contains("1")) %>% group_by(`LiftedSquat1Kg?`) %>% summarize(n = n())
## # A tibble: 2 × 2
## `LiftedSquat1Kg?` n
## <fct> <int>
## 1 Not lifted 48879
## 2 Lifted 318766
df_mca %>% select(contains("1")) %>% group_by(`LiftedBench1Kg?`) %>% summarize(n = n())
## # A tibble: 2 × 2
## `LiftedBench1Kg?` n
## <fct> <int>
## 1 Not lifted 36263
## 2 Lifted 331382
df_mca %>% select(contains("1")) %>% group_by(`LiftedDeadlift1Kg?`) %>% summarize(n = n())
## # A tibble: 2 × 2
## `LiftedDeadlift1Kg?` n
## <fct> <int>
## 1 Not lifted 17619
## 2 Lifted 350026
df_mca %>% select(contains("2")) %>% group_by(`LiftedSquat2Kg?`) %>% summarize(n = n())
## # A tibble: 2 × 2
## `LiftedSquat2Kg?` n
## <fct> <int>
## 1 Not lifted 72508
## 2 Lifted 295137
df_mca %>% select(contains("2")) %>% group_by(`LiftedBench2Kg?`) %>% summarize(n = n())
## # A tibble: 2 × 2
## `LiftedBench2Kg?` n
## <fct> <int>
## 1 Not lifted 80240
## 2 Lifted 287405
df_mca %>% select(contains("2")) %>% group_by(`LiftedDeadlift2Kg?`) %>% summarize(n = n())
## # A tibble: 2 × 2
## `LiftedDeadlift2Kg?` n
## <fct> <int>
## 1 Not lifted 49546
## 2 Lifted 318099
df_mca %>% select(contains("3")) %>% group_by(`LiftedSquat3Kg?`) %>% summarize(n = n())
## # A tibble: 2 × 2
## `LiftedSquat3Kg?` n
## <fct> <int>
## 1 Not lifted 137288
## 2 Lifted 230357
df_mca %>% select(contains("3")) %>% group_by(`LiftedBench3Kg?`) %>% summarize(n = n())
## # A tibble: 2 × 2
## `LiftedBench3Kg?` n
## <fct> <int>
## 1 Not lifted 195228
## 2 Lifted 172417
df_mca %>% select(contains("3")) %>% group_by(`LiftedDeadlift3Kg?`) %>% summarize(n = n())
## # A tibble: 2 × 2
## `LiftedDeadlift3Kg?` n
## <fct> <int>
## 1 Not lifted 155407
## 2 Lifted 212238
df %>%
ggplot(aes(x = TotalKg, fill = Sex)) + geom_boxplot() +
facet_wrap(~Sex) + scale_fill_viridis_d()
df %>%
filter(!is.na(Country)) %>%
ggplot(aes(x = TotalKg, fill = Country)) +
geom_boxplot(show.legend = F) +
facet_wrap(~Country) +
scale_fill_viridis_d()
df %>%
group_by(Date) %>%
summarize(m = median(TotalKg)) %>%
ggplot(aes(x = Date, y = m)) + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
df %>%
group_by(Tested) %>%
summarize(n = n()) %>%
ggplot(aes(x = Tested, y = n)) + geom_col()
round(cor(numeric_cols, use = "complete.obs"), 1) %>%
ggcorrplot(ggtheme = ggplot2::theme_gray,
colors = c("#6D9EC1", "white", "#E46726"),
lab = T, type = "lower")
##World map over total df
world <- map_data("world")
world %>%
merge(df %>%
group_by(Country) %>%
summarize(n = mean(TotalKg, na.rm = T)) %>%
mutate(country = Country),
by.x = "region", by.y = "country", all.x = T) %>%
arrange(group, order) %>%
ggplot(aes(x = long, y = lat, group = group, fill = n)) +
geom_polygon(color = "white", size = 0.2) +
scale_fill_viridis(option="mako", na.value = "gray90") +
theme_minimal() +
theme(axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank()) +
ggtitle("Mean KG lifted by country")
world %>%
merge(df %>%
group_by(Country) %>%
summarize(n = mean(Age, na.rm = T)) %>%
mutate(country = Country),
by.x = "region", by.y = "country", all.x = T) %>%
arrange(group, order) %>%
ggplot(aes(x = long, y = lat, group = group, fill = n)) +
geom_polygon(color = "white", size = 0.2) +
scale_fill_viridis(option="mako", na.value = "gray90") +
theme_minimal() +
theme(axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank()) +
ggtitle("Mean Age by country")